library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(here)
## Warning: package 'here' was built under R version 4.1.3
## here() starts at C:/Users/mahri/OneDrive/CodeClan/rshiny_dashboard_project/Work In Progress/Demographics
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.3
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.1.3
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(stringr)
hosp_activity_agesex <- read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_age_and_sex.csv"))
## Rows: 129599 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (12): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (6): Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, LengthOf...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_simd <-
read_csv(here("../../raw_data/inpatient_and_daycase_by_nhs_board_of_treatment_and_simd.csv"))
## Rows: 40894 Columns: 18
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (11): Quarter, QuarterQF, HB, HBQF, Location, LocationQF, AdmissionType,...
## dbl (7): SIMD, Episodes, LengthOfEpisode, AverageLengthOfEpisode, Stays, Le...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
hosp_activity_agesex <- janitor::clean_names(hosp_activity_agesex)
# glimpse(hosp_activity_agesex)
hosp_activity_simd <- janitor::clean_names(hosp_activity_simd)
# glimpse(hosp_acivity_simd)
We only need ACUTE patients, what do we have:
admission_type: * Elective Inpatients (not acute) * Emergency
Inpatients
* Transfers (not acute)
* All Day cases (not acute)
* All Inpatients (not necasseraly acute)
* All Inpatients and Day cases (not acute)
* Not Specified (not acute???)
hosp_activity_agesex %>%
group_by(admission_type) %>%
summarise(total = n())
hosp_activity_simd %>%
group_by(admission_type) %>%
summarise(total = n())
removing those that aren’t acute (see above)
hosp_acute_activity_agesex <- hosp_activity_agesex %>%
filter(admission_type == "Emergency Inpatients")
hosp_acute_activity_simd <- hosp_activity_simd %>%
filter(admission_type == "Emergency Inpatients")
Age
hosp_acute_activity_agesex %>%
group_by(age) %>%
summarise(total_patients_in_age_group = n())
# 10 year increments i.e. 0-9, 10-19... 80-89, 90 years and over
# No NA values
hosp_acute_activity_agesex %>%
mutate(age = is.na(age)) %>%
filter(age == TRUE)
Sex
hosp_acute_activity_agesex %>%
group_by(sex) %>%
summarise(total_patients_in_sex_group = n())
# Female: 9,832 Male: 9,799
# No NA values
hosp_acute_activity_agesex %>%
mutate(sex = is.na(age)) %>%
filter(sex == TRUE)
SIMD
hosp_acute_activity_simd %>%
group_by(simd) %>%
summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis
Remember there were 7 admission types… I chose to keep one.
Might want to make use of this if we’re not sure about admission
types:
acute_target <- c("Emergency Inpatients", "Not Specified")
Others = Elective Inpatients, Transfers, All Day cases, All Inpatients, All Inpatients and Day cases, Not Specified
# do I want to use total length of stay or total average_length_of_stay
# need a total column:
# sometimes stay = 0, and length of stay = 5
#Male
total_stays_sex_males <- hosp_acute_activity_agesex %>%
filter(sex == "Male") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_sex = sum(stays))
total_stays_sex_males
#Female
total_stays_sex_females <- hosp_acute_activity_agesex %>%
filter(sex == "Female") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_sex = sum(stays))
total_stays_sex_females
Bind males and females:
hosp_acute_activity_sex_total_stays <- bind_rows(total_stays_sex_females, total_stays_sex_males)
hosp_acute_activity_sex_total_stays
ALSO WANT TO BIND FOR AGE GROUPS
hosp_acute_activity_sex_total_stays %>%
group_by(age) %>%
summarise(total = n())
# 0-9 years
age_0_9 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "0-9 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_0_9
# 10-19 years
age_10_19 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "10-19 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_10_19
# 20-29 years
age_20_29 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "20-29 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_20_29
# 30-39 years
age_30_39 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "30-39 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_30_39
# 40-49 years
age_40_49 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "40-49 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_40_49
# 20-29 years
age_50_59 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "50-59 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_50_59
# 60-69 years
age_60_69 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "60-69 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_60_69
# 70-79 years
age_70_79 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "70-79 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_70_79
# 80-89 years
age_80_89 <- hosp_acute_activity_sex_total_stays %>%
filter(age == "80-89 years") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_80_89
# 90 years and over
age_90_plus <- hosp_acute_activity_sex_total_stays %>%
filter(age == "90 years and over") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_age = sum(stays))
age_90_plus
Bind these together
hosp_acute_activity_total_stays <- bind_rows(age_0_9, age_10_19, age_20_29, age_30_39, age_40_49, age_50_59, age_60_69, age_70_79, age_80_89, age_90_plus)
hosp_acute_activity_total_stays
hosp_acute_activity_total_stays %>%
ggplot()+
aes(x = quarter,
y = total_stays_per_quarter_sex,
group = sex, colour = sex) +
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Stays Across Scotland by Sex",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "Sex") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
hosp_acute_activity_total_stays %>%
ggplot()+
aes(x = quarter,
y = total_stays_per_quarter_age,
group = age, colour = age) +
geom_point()+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Stays Across Scotland by Age Group",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "Age") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
SIMD (Scottish Index of Multiple Deprivation)
hosp_acute_activity_simd %>%
group_by(simd) %>%
summarise(total = n())
#There are 962 NAs and ~ 1000 of levels 1:5... remember to tidy this in analysis
Drop NAs
hosp_acute_activity_simd_no_na <- hosp_acute_activity_simd %>%
drop_na(simd)
hosp_acute_activity_simd_no_na
# simd 1
simd1 <- hosp_acute_activity_simd_no_na %>%
filter(simd == "1") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_simd = sum(stays))
simd1
# simd 2
simd2 <- hosp_acute_activity_simd_no_na %>%
filter(simd == "2") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_simd = sum(stays))
simd2
# simd 3
simd3 <- hosp_acute_activity_simd_no_na %>%
filter(simd == "3") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_simd = sum(stays))
simd3
# simd 4
simd4 <- hosp_acute_activity_simd_no_na %>%
filter(simd == "4") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_simd = sum(stays))
simd4
# simd 5
simd5 <- hosp_acute_activity_simd_no_na %>%
filter(simd == "5") %>%
group_by(quarter) %>%
mutate(total_stays_per_quarter_simd = sum(stays))
simd5
bind smid
hosp_acute_activity_total_stays_simd <- bind_rows(simd1, simd2, simd3, simd4,
simd5)
hosp_acute_activity_total_stays_simd
hosp_acute_activity_total_stays_simd %>%
mutate(simd = fct_relevel(as.character(simd,
"1", "2", "3", "4", "5"))) %>%
ggplot()+
aes(x = quarter,
y = total_stays_per_quarter_simd,
group = simd, colour = simd) +
geom_point()+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Stays Across Scotland by SIMD Level",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))
# simd 1
simd1e <- hosp_acute_activity_total_stays_simd %>%
filter(simd == "1") %>%
group_by(quarter) %>%
mutate(total_episodes_per_quarter_simd = sum(episodes))
simd1e
# simd 2
simd2e <- hosp_acute_activity_total_stays_simd %>%
filter(simd == "2") %>%
group_by(quarter) %>%
mutate(total_episodes_per_quarter_simd = sum(episodes))
simd2e
# simd 3
simd3e <- hosp_acute_activity_total_stays_simd %>%
filter(simd == "3") %>%
group_by(quarter) %>%
mutate(total_episodes_per_quarter_simd = sum(episodes))
simd3e
# simd 4
simd4e <- hosp_acute_activity_total_stays_simd %>%
filter(simd == "4") %>%
group_by(quarter) %>%
mutate(total_episodes_per_quarter_simd = sum(episodes))
simd4e
# simd 5
simd5e <- hosp_acute_activity_total_stays_simd %>%
filter(simd == "5") %>%
group_by(quarter) %>%
mutate(total_episodes_per_quarter_simd = sum(episodes))
simd5e
hosp_acute_activity_total_stays_andEps_simd <- bind_rows(simd1e, simd2e, simd3e,
simd4e, simd5e)
hosp_acute_activity_total_stays_andEps_simd %>%
mutate(simd = fct_relevel(as.character(simd,
"1", "2", "3", "4", "5"))) %>%
ggplot()+
aes(x = quarter,
y = total_episodes_per_quarter_simd,
group = simd, colour = simd) +
geom_point()+
geom_line() +
labs(x = "Yearly Quarter",
y = "Total Stays",
title = "Total Emergency Inpatient Episodes Across Scotland by SIMD Level",
subtitle = "Q3, 2016 - Q3, 2021",
colour = "SIMD Level:
1 = Most Deprived
5 = Least Deprived") +
theme_bw()+
theme(axis.text.x = element_text(angle=45, hjust=0.9))